浏览量 34624
2019/11/15 12:14
python截取长网页 并将截取的网页保存至本地 后发送邮件
准备工作 安装依赖
yum install chromedriver
yum install https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
pip install selenium
核心代码
#!/bin/python
# coding=utf-8
# author: wz
# mail: 277215243@qq.com
# datetime:2019/10/15 12:42 PM
# web: https://www.bthlt.com
from selenium import webdriver
import time
import os.path
import multiprocessing as mp
from selenium.webdriver.chrome.options import Options
from email import encoders
from email.mime.base import MIMEBase
from email.header import Header
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage
mailto_list = ['277215243@qq.com']
mail_host = 'smtp.163.com'
mail_user = '******@163.com'
mail_pass = '******'
mail_postfix = '163.com'
def webshot():
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('lang=zh_CN.UTF-8')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()
js_height = "return document.body.clientHeight"
picname = 'tbc.png'
link = 'https://www.bthlt.com/'
try:
driver.get(link)
k = 1
height = driver.execute_script(js_height)
while True:
if k * 800 < height:
js_move = "window.scrollTo(0,{})".format(k * 800)
print(js_move)
driver.execute_script(js_move)
time.sleep(0.2)
height = driver.execute_script(js_height)
k += 1
else:
break
scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
driver.set_window_size(scroll_width, scroll_height)
driver.get_screenshot_as_file(
"/data/www/" + picname)
print("Process {} get one pic !!!".format(os.getpid()))
time.sleep(3)
except Exception as e:
print(picname, e)
def send_mail(to_list, sub):
me = '葫芦' + '<' + mail_user + '@' + mail_postfix + '>'
msg = MIMEMultipart()
msg['Subject'] = sub
msg['From'] = me
msg['to'] = ';'.join(to_list)
body = """
<html lang="en">
<body>
<h1>脚本网页截图</h1>
<hr />
<br />
<a href="https://www.bthlt.com">葫芦的运维日志</a>
<br />
![](cid:0)
</body>
</html>
"""
msg.attach(MIMEText(body, 'html', 'utf-8'))
with open('/data/www/tbc.png', 'rb') as f:
mime = MIMEBase('image', 'png', filename='tbc.png')
mime.add_header('Content-Disposition', 'attachment', filename='tbc.png')
mime.add_header('Content-ID', '<0>')
mime.add_header('X-Attachment-Id', '0')
mime.set_payload(f.read())
encoders.encode_base64(mime)
msg.attach(mime)
try:
server = smtplib.SMTP()
server.connect(mail_host)
server.login(mail_user, mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception as e:
print(e)
return False
if __name__ == '__main__':
t = time.time()
webshot()
if send_mail(mailto_list, '脚本网页截图 并发送邮件'):
print('发送成功')
else:
print('发送失败')
print("操作结束,耗时:{:.2f}秒".format(float(time.time() - t)))
-rw-r--r-- 1 root root 2.5M Nov 15 12:46 /data/www/tbc.png
结果
待解决
中文乱码问题未能解决
补充已解决
将本地中文字体传至 /usr/share/fonts目录中解决中文字体问题
上一篇 搜索 下一篇